
* ==============================================================================
* Industry decomposition. 

* This file imports raw industry-level pollution and output data, cleans and
* aggregates to a cohesive set of industries, and stores as a .dta file. 

* Note that the folder structure in the raw data folder is preserved in the stata 
* data folder. 

* Note that if using the data files provided with the REStat replication 
* ("StationList_withCMA_SecondJoin.dta" and "PM25_NAPS_Data.dta") please start
* from Step 6. 

* Author: Nouri Najjar
* Created: August 2015
* Last Updated: November 2019
* ==============================================================================

* ------------------------------------------------------------
* Preamble: Define data directories. Must be adjusted accordingly
* ------------------------------------------------------------
* Set datadir
local datadir D:\REStat Data Archive\Industry Data\
* ------------------------------------------------------------

/*
* ----------------------------------------------------------------------------
* Step 1: Import and clean pollution data from APEI
* ----------------------------------------------------------------------------
forvalues i = 1990(1)2015 {
	* Import
	clear
	import excel using "`datadir'APEI Data\EN_2017APEI_`i'_NatProvTerr.xlsx", sheet("CA") cellrange(A3:R167) firstrow

	* Fix pulp and paper labeling
	gen dupPulp = cond(_N==1,0,_n) if SECTORS=="Pulp and Paper Industry"
	drop if dupPulp==88

	* Rename sectors
	gen SectorNew = "Chemicals" if SECTORS=="Biofuel Production" | SECTORS=="Chemicals Industry"
	replace SectorNew = "Abrasives"	if SECTORS=="Abrasives Manufacture"
	replace SectorNew = "Food"	if SECTORS=="Bakeries"| SECTORS=="Food Preparation" | SECTORS=="Grain Processing"
	replace SectorNew = "Electronics"	if SECTORS=="Electronics" | SECTORS=="Measuring, Medical and Controlling Devices Manufacturing"
	replace SectorNew = "Glass"	if SECTORS=="Glass Manufacture"
	replace SectorNew = "Plastics"	if SECTORS=="Plastics Manufacture"
	replace SectorNew = "Paper"	if SECTORS=="Pulp and Paper Industry" | SECTORS=="Converted Paper Product Manufacturing"
	replace SectorNew = "Textiles"	if SECTORS=="Textiles"
	replace SectorNew = "Transportation Equipment"	if SECTORS=="Vehicle Manufacture ( Engines, Parts, Assembly, Painting)" | SECTORS=="Ship & Boat Building & Repairing"
	replace SectorNew = "Wood"	if SECTORS=="Wood Products"
	replace SectorNew = "Rubber"	if SECTORS=="Rubber Manufacturing"
	replace SectorNew = "Asphalt"	if SECTORS=="Asphalt Shingle and Coating Material Manufacturing" | SECTORS=="Asphalt Paving Industry"
	replace SectorNew = "Cement and Concrete"	if SECTORS=="Cement Manufacture" | SECTORS=="Concrete Batching and Products" | SECTORS=="Brick Products"
	replace SectorNew = "Lime and Gypsum"	if SECTORS=="Lime Manufacture" | SECTORS=="Gypsum Product Manufacturing"
	replace SectorNew = "Other Minerals"	if SECTORS=="Clay Products" | SECTORS=="Other Mineral Products"
	replace SectorNew = "Petroleum Refining"	if SECTORS=="Petroleum Refining" | SECTORS=="Other Downstream Petroleum Industry"
	replace SectorNew = "Printing"	if SECTORS=="Printing"
	replace SectorNew = "Metals"	if SECTORS=="Metal Fabrication" | SECTORS=="Aluminum Industry" | SECTORS=="Foundries" | SECTORS=="Iron and Steel Industries" | SECTORS=="Non-Ferrous Mining and Smelting Industry"
	replace SectorNew = "Other"	if SECTORS=="Other Manufacturing Industries"

	* Keep required sectors
	keep if missing(SectorNew)==0

	* Drop unwanted variables
	keep SectorNew TPMT PM10T PM25T SOXT NOXT VOCT COT

	* Collapse to new sector level
	collapse (sum) TPMT PM10T PM25T SOXT NOXT VOCT COT, by(SectorNew)

	* Add year variable
	gen Year = `i'
	
	* Sort
	sort SectorNew
	
	* Save data
	save "`datadir'APEI Data\Pollution`i'.dta", replace

}

*local datadir D:\Google Drive\The Costs and Benefits of CEPA\Decomposition\

****
* Create pollution panel
****
* Open 1990 data for baseline, save
use "`datadir'APEI Data\Pollution1990.dta", clear
save "`datadir'APEI Data\PollutionPanel_1990to2015.dta", replace

* Append other years
forvalues i = 1991(1)2015 {
	append using "`datadir'APEI Data\Pollution`i'.dta"
	save "`datadir'APEI Data\PollutionPanel_1990to2015.dta", replace
}
****
* ----------------------------------------------------------------------------


* ----------------------------------------------------------------------------
* Step 2: Import and clean shipments data - Year-Month level
* ----------------------------------------------------------------------------
* Import
clear
import excel using "`datadir'CANSIM Data\NominalSales_92to2015.xlsx", cellrange(A293) firstrow

* Rename variables
{
rename COL0 Series
rename	COL1	y199201
rename	COL2	y199202
rename	COL3	y199203
rename	COL4	y199204
rename	COL5	y199205
rename	COL6	y199206
rename	COL7	y199207
rename	COL8	y199208
rename	COL9	y199209
rename	COL10	y199210
rename	COL11	y199211
rename	COL12	y199212
rename	COL13	y199301
rename	COL14	y199302
rename	COL15	y199303
rename	COL16	y199304
rename	COL17	y199305
rename	COL18	y199306
rename	COL19	y199307
rename	COL20	y199308
rename	COL21	y199309
rename	COL22	y199310
rename	COL23	y199311
rename	COL24	y199312
rename	COL25	y199401
rename	COL26	y199402
rename	COL27	y199403
rename	COL28	y199404
rename	COL29	y199405
rename	COL30	y199406
rename	COL31	y199407
rename	COL32	y199408
rename	COL33	y199409
rename	COL34	y199410
rename	COL35	y199411
rename	COL36	y199412
rename	COL37	y199501
rename	COL38	y199502
rename	COL39	y199503
rename	COL40	y199504
rename	COL41	y199505
rename	COL42	y199506
rename	COL43	y199507
rename	COL44	y199508
rename	COL45	y199509
rename	COL46	y199510
rename	COL47	y199511
rename	COL48	y199512
rename	COL49	y199601
rename	COL50	y199602
rename	COL51	y199603
rename	COL52	y199604
rename	COL53	y199605
rename	COL54	y199606
rename	COL55	y199607
rename	COL56	y199608
rename	COL57	y199609
rename	COL58	y199610
rename	COL59	y199611
rename	COL60	y199612
rename	COL61	y199701
rename	COL62	y199702
rename	COL63	y199703
rename	COL64	y199704
rename	COL65	y199705
rename	COL66	y199706
rename	COL67	y199707
rename	COL68	y199708
rename	COL69	y199709
rename	COL70	y199710
rename	COL71	y199711
rename	COL72	y199712
rename	COL73	y199801
rename	COL74	y199802
rename	COL75	y199803
rename	COL76	y199804
rename	COL77	y199805
rename	COL78	y199806
rename	COL79	y199807
rename	COL80	y199808
rename	COL81	y199809
rename	COL82	y199810
rename	COL83	y199811
rename	COL84	y199812
rename	COL85	y199901
rename	COL86	y199902
rename	COL87	y199903
rename	COL88	y199904
rename	COL89	y199905
rename	COL90	y199906
rename	COL91	y199907
rename	COL92	y199908
rename	COL93	y199909
rename	COL94	y199910
rename	COL95	y199911
rename	COL96	y199912
rename	COL97	y200001
rename	COL98	y200002
rename	COL99	y200003
rename	COL100	y200004
rename	COL101	y200005
rename	COL102	y200006
rename	COL103	y200007
rename	COL104	y200008
rename	COL105	y200009
rename	COL106	y200010
rename	COL107	y200011
rename	COL108	y200012
rename	COL109	y200101
rename	COL110	y200102
rename	COL111	y200103
rename	COL112	y200104
rename	COL113	y200105
rename	COL114	y200106
rename	COL115	y200107
rename	COL116	y200108
rename	COL117	y200109
rename	COL118	y200110
rename	COL119	y200111
rename	COL120	y200112
rename	COL121	y200201
rename	COL122	y200202
rename	COL123	y200203
rename	COL124	y200204
rename	COL125	y200205
rename	COL126	y200206
rename	COL127	y200207
rename	COL128	y200208
rename	COL129	y200209
rename	COL130	y200210
rename	COL131	y200211
rename	COL132	y200212
rename	COL133	y200301
rename	COL134	y200302
rename	COL135	y200303
rename	COL136	y200304
rename	COL137	y200305
rename	COL138	y200306
rename	COL139	y200307
rename	COL140	y200308
rename	COL141	y200309
rename	COL142	y200310
rename	COL143	y200311
rename	COL144	y200312
rename	COL145	y200401
rename	COL146	y200402
rename	COL147	y200403
rename	COL148	y200404
rename	COL149	y200405
rename	COL150	y200406
rename	COL151	y200407
rename	COL152	y200408
rename	COL153	y200409
rename	COL154	y200410
rename	COL155	y200411
rename	COL156	y200412
rename	COL157	y200501
rename	COL158	y200502
rename	COL159	y200503
rename	COL160	y200504
rename	COL161	y200505
rename	COL162	y200506
rename	COL163	y200507
rename	COL164	y200508
rename	COL165	y200509
rename	COL166	y200510
rename	COL167	y200511
rename	COL168	y200512
rename	COL169	y200601
rename	COL170	y200602
rename	COL171	y200603
rename	COL172	y200604
rename	COL173	y200605
rename	COL174	y200606
rename	COL175	y200607
rename	COL176	y200608
rename	COL177	y200609
rename	COL178	y200610
rename	COL179	y200611
rename	COL180	y200612
rename	COL181	y200701
rename	COL182	y200702
rename	COL183	y200703
rename	COL184	y200704
rename	COL185	y200705
rename	COL186	y200706
rename	COL187	y200707
rename	COL188	y200708
rename	COL189	y200709
rename	COL190	y200710
rename	COL191	y200711
rename	COL192	y200712
rename	COL193	y200801
rename	COL194	y200802
rename	COL195	y200803
rename	COL196	y200804
rename	COL197	y200805
rename	COL198	y200806
rename	COL199	y200807
rename	COL200	y200808
rename	COL201	y200809
rename	COL202	y200810
rename	COL203	y200811
rename	COL204	y200812
rename	COL205	y200901
rename	COL206	y200902
rename	COL207	y200903
rename	COL208	y200904
rename	COL209	y200905
rename	COL210	y200906
rename	COL211	y200907
rename	COL212	y200908
rename	COL213	y200909
rename	COL214	y200910
rename	COL215	y200911
rename	COL216	y200912
rename	COL217	y201001
rename	COL218	y201002
rename	COL219	y201003
rename	COL220	y201004
rename	COL221	y201005
rename	COL222	y201006
rename	COL223	y201007
rename	COL224	y201008
rename	COL225	y201009
rename	COL226	y201010
rename	COL227	y201011
rename	COL228	y201012
rename	COL229	y201101
rename	COL230	y201102
rename	COL231	y201103
rename	COL232	y201104
rename	COL233	y201105
rename	COL234	y201106
rename	COL235	y201107
rename	COL236	y201108
rename	COL237	y201109
rename	COL238	y201110
rename	COL239	y201111
rename	COL240	y201112
rename	COL241	y201201
rename	COL242	y201202
rename	COL243	y201203
rename	COL244	y201204
rename	COL245	y201205
rename	COL246	y201206
rename	COL247	y201207
rename	COL248	y201208
rename	COL249	y201209
rename	COL250	y201210
rename	COL251	y201211
rename	COL252	y201212
rename	COL253	y201301
rename	COL254	y201302
rename	COL255	y201303
rename	COL256	y201304
rename	COL257	y201305
rename	COL258	y201306
rename	COL259	y201307
rename	COL260	y201308
rename	COL261	y201309
rename	COL262	y201310
rename	COL263	y201311
rename	COL264	y201312
rename	COL265	y201401
rename	COL266	y201402
rename	COL267	y201403
rename	COL268	y201404
rename	COL269	y201405
rename	COL270	y201406
rename	COL271	y201407
rename	COL272	y201408
rename	COL273	y201409
rename	COL274	y201410
rename	COL275	y201411
rename	COL276	y201412
rename	COL277	y201501
rename	COL278	y201502
rename	COL279	y201503
rename	COL280	y201504
rename	COL281	y201505
rename	COL282	y201506
rename	COL283	y201507
rename	COL284	y201508
rename	COL285	y201509
rename	COL286	y201510
rename	COL287	y201511
rename	COL288	y201512
}





/*
* Create new sector definitions
gen SectorNew = "Chemicals" if NAICS=="325"
replace SectorNew = "Abrasives"	if NAICS=="32791"
replace SectorNew = "Food"	if NAICS=="311" | NAICS=="312"
replace SectorNew = "Electronics"	if NAICS=="334" | NAICS=="335"
replace SectorNew = "Glass"	if NAICS=="3272"
replace SectorNew = "Plastics"	if NAICS=="3261"
replace SectorNew = "Paper"	if NAICS=="322"
replace SectorNew = "Textiles"	if NAICS=="313" | NAICS=="314" | NAICS=="315"
replace SectorNew = "Transportation Equipment"	if NAICS=="336"
replace SectorNew = "Wood"	if NAICS=="321"
replace SectorNew = "Rubber"	if NAICS=="3262"
replace SectorNew = "Asphalt"	if NAICS=="32412"
replace SectorNew = "Cement and Concrete"	if NAICS=="3273"
replace SectorNew = "Lime and Gypsum"	if NAICS=="3274"
replace SectorNew = "Other Minerals"	if NAICS=="3271" | NAICS=="32799"
replace SectorNew = "Petroleum Refining"	if NAICS=="32411" | NAICS=="32419"
replace SectorNew = "Printing"	if NAICS=="323"
replace SectorNew = "Metals"	if NAICS=="331" | NAICS=="332"
replace SectorNew = "Other"	if NAICS=="316" | NAICS=="333" | NAICS=="337" | NAICS=="339"
*/

* Drop old sector
drop Series

* Recode NAICS
destring NAICS, replace

* Collapse to year-month on new sector
*collapse (sum) y*, by(SectorNew)

* Reshape from wide to long
reshape long y, i(NAICS) j(YearMonth)
rename y NomSales

* Save
save "`datadir'CANSIM Data\NominalSales_Monthly_92to2015.dta", replace
* ----------------------------------------------------------------------------


* ----------------------------------------------------------------------------
* Step 3: Import and clean producer price index - Year-Month level
* Cansim data used: http://www5.statcan.gc.ca/cansim/a26;jsessionid=0EE8D489EBD319D2C2F2913CC03B28AB
* ----------------------------------------------------------------------------
****
* Clean prices
****
* Import
clear
import excel using "`datadir'CANSIM Data\ProducerPriceIndex_92to2015.xlsx", firstrow

* Rename variables
{
rename	B	y199201
rename	C	y199202
rename	D	y199203
rename	E	y199204
rename	F	y199205
rename	G	y199206
rename	H	y199207
rename	I	y199208
rename	J	y199209
rename	K	y199210
rename	L	y199211
rename	M	y199212
rename	N	y199301
rename	O	y199302
rename	P	y199303
rename	Q	y199304
rename	R	y199305
rename	S	y199306
rename	T	y199307
rename	U	y199308
rename	V	y199309
rename	W	y199310
rename	X	y199311
rename	Y	y199312
rename	Z	y199401
rename	AA	y199402
rename	AB	y199403
rename	AC	y199404
rename	AD	y199405
rename	AE	y199406
rename	AF	y199407
rename	AG	y199408
rename	AH	y199409
rename	AI	y199410
rename	AJ	y199411
rename	AK	y199412
rename	AL	y199501
rename	AM	y199502
rename	AN	y199503
rename	AO	y199504
rename	AP	y199505
rename	AQ	y199506
rename	AR	y199507
rename	AS	y199508
rename	AT	y199509
rename	AU	y199510
rename	AV	y199511
rename	AW	y199512
rename	AX	y199601
rename	AY	y199602
rename	AZ	y199603
rename	BA	y199604
rename	BB	y199605
rename	BC	y199606
rename	BD	y199607
rename	BE	y199608
rename	BF	y199609
rename	BG	y199610
rename	BH	y199611
rename	BI	y199612
rename	BJ	y199701
rename	BK	y199702
rename	BL	y199703
rename	BM	y199704
rename	BN	y199705
rename	BO	y199706
rename	BP	y199707
rename	BQ	y199708
rename	BR	y199709
rename	BS	y199710
rename	BT	y199711
rename	BU	y199712
rename	BV	y199801
rename	BW	y199802
rename	BX	y199803
rename	BY	y199804
rename	BZ	y199805
rename	CA	y199806
rename	CB	y199807
rename	CC	y199808
rename	CD	y199809
rename	CE	y199810
rename	CF	y199811
rename	CG	y199812
rename	CH	y199901
rename	CI	y199902
rename	CJ	y199903
rename	CK	y199904
rename	CL	y199905
rename	CM	y199906
rename	CN	y199907
rename	CO	y199908
rename	CP	y199909
rename	CQ	y199910
rename	CR	y199911
rename	CS	y199912
rename	CT	y200001
rename	CU	y200002
rename	CV	y200003
rename	CW	y200004
rename	CX	y200005
rename	CY	y200006
rename	CZ	y200007
rename	DA	y200008
rename	DB	y200009
rename	DC	y200010
rename	DD	y200011
rename	DE	y200012
rename	DF	y200101
rename	DG	y200102
rename	DH	y200103
rename	DI	y200104
rename	DJ	y200105
rename	DK	y200106
rename	DL	y200107
rename	DM	y200108
rename	DN	y200109
rename	DO	y200110
rename	DP	y200111
rename	DQ	y200112
rename	DR	y200201
rename	DS	y200202
rename	DT	y200203
rename	DU	y200204
rename	DV	y200205
rename	DW	y200206
rename	DX	y200207
rename	DY	y200208
rename	DZ	y200209
rename	EA	y200210
rename	EB	y200211
rename	EC	y200212
rename	ED	y200301
rename	EE	y200302
rename	EF	y200303
rename	EG	y200304
rename	EH	y200305
rename	EI	y200306
rename	EJ	y200307
rename	EK	y200308
rename	EL	y200309
rename	EM	y200310
rename	EN	y200311
rename	EO	y200312
rename	EP	y200401
rename	EQ	y200402
rename	ER	y200403
rename	ES	y200404
rename	ET	y200405
rename	EU	y200406
rename	EV	y200407
rename	EW	y200408
rename	EX	y200409
rename	EY	y200410
rename	EZ	y200411
rename	FA	y200412
rename	FB	y200501
rename	FC	y200502
rename	FD	y200503
rename	FE	y200504
rename	FF	y200505
rename	FG	y200506
rename	FH	y200507
rename	FI	y200508
rename	FJ	y200509
rename	FK	y200510
rename	FL	y200511
rename	FM	y200512
rename	FN	y200601
rename	FO	y200602
rename	FP	y200603
rename	FQ	y200604
rename	FR	y200605
rename	FS	y200606
rename	FT	y200607
rename	FU	y200608
rename	FV	y200609
rename	FW	y200610
rename	FX	y200611
rename	FY	y200612
rename	FZ	y200701
rename	GA	y200702
rename	GB	y200703
rename	GC	y200704
rename	GD	y200705
rename	GE	y200706
rename	GF	y200707
rename	GG	y200708
rename	GH	y200709
rename	GI	y200710
rename	GJ	y200711
rename	GK	y200712
rename	GL	y200801
rename	GM	y200802
rename	GN	y200803
rename	GO	y200804
rename	GP	y200805
rename	GQ	y200806
rename	GR	y200807
rename	GS	y200808
rename	GT	y200809
rename	GU	y200810
rename	GV	y200811
rename	GW	y200812
rename	GX	y200901
rename	GY	y200902
rename	GZ	y200903
rename	HA	y200904
rename	HB	y200905
rename	HC	y200906
rename	HD	y200907
rename	HE	y200908
rename	HF	y200909
rename	HG	y200910
rename	HH	y200911
rename	HI	y200912
rename	HJ	y201001
rename	HK	y201002
rename	HL	y201003
rename	HM	y201004
rename	HN	y201005
rename	HO	y201006
rename	HP	y201007
rename	HQ	y201008
rename	HR	y201009
rename	HS	y201010
rename	HT	y201011
rename	HU	y201012
rename	HV	y201101
rename	HW	y201102
rename	HX	y201103
rename	HY	y201104
rename	HZ	y201105
rename	IA	y201106
rename	IB	y201107
rename	IC	y201108
rename	ID	y201109
rename	IE	y201110
rename	IF	y201111
rename	IG	y201112
rename	IH	y201201
rename	II	y201202
rename	IJ	y201203
rename	IK	y201204
rename	IL	y201205
rename	IM	y201206
rename	IN	y201207
rename	IO	y201208
rename	IP	y201209
rename	IQ	y201210
rename	IR	y201211
rename	IS	y201212
rename	IT	y201301
rename	IU	y201302
rename	IV	y201303
rename	IW	y201304
rename	IX	y201305
rename	IY	y201306
rename	IZ	y201307
rename	JA	y201308
rename	JB	y201309
rename	JC	y201310
rename	JD	y201311
rename	JE	y201312
rename	JF	y201401
rename	JG	y201402
rename	JH	y201403
rename	JI	y201404
rename	JJ	y201405
rename	JK	y201406
rename	JL	y201407
rename	JM	y201408
rename	JN	y201409
rename	JO	y201410
rename	JP	y201411
rename	JQ	y201412
rename	JR	y201501
rename	JS	y201502
rename	JT	y201503
rename	JU	y201504
rename	JV	y201505
rename	JW	y201506
rename	JX	y201507
rename	JY	y201508
rename	JZ	y201509
rename	KA	y201510
rename	KB	y201511
rename	KC	y201512
}

/*
* Create new sector definitions
gen SectorNew = "Chemicals" if NAICS=="325"
replace SectorNew = "Abrasives"	if NAICS=="32791"
replace SectorNew = "Food"	if NAICS=="311" | NAICS=="312"
replace SectorNew = "Electronics"	if NAICS=="334" | NAICS=="335"
replace SectorNew = "Glass"	if NAICS=="3272"
replace SectorNew = "Plastics"	if NAICS=="3261"
replace SectorNew = "Paper"	if NAICS=="322"
replace SectorNew = "Textiles"	if NAICS=="313" | NAICS=="314" | NAICS=="315"
replace SectorNew = "Transportation Equipment"	if NAICS=="336"
replace SectorNew = "Wood"	if NAICS=="321"
replace SectorNew = "Rubber"	if NAICS=="3262"
replace SectorNew = "Asphalt"	if NAICS=="32412"
replace SectorNew = "Cement and Concrete"	if NAICS=="3273"
replace SectorNew = "Lime and Gypsum"	if NAICS=="3274"
replace SectorNew = "Other Minerals"	if NAICS=="3271" | NAICS=="32799"
replace SectorNew = "Petroleum Refining"	if NAICS=="32411" | NAICS=="32419"
replace SectorNew = "Printing"	if NAICS=="323"
replace SectorNew = "Metals"	if NAICS=="331" | NAICS=="332"
replace SectorNew = "Other"	if NAICS=="316" | NAICS=="333" | NAICS=="337" | NAICS=="339"

* Drop old sector
drop NAICS
*/

* Recode NAICS
destring NAICS, replace

* Collapse to year-month on new sector
*collapse (sum) y*, by(SectorNew)

* Reshape from wide to long
reshape long y, i(NAICS) j(YearMonth)
rename y PriceInd

* Sort
sort NAICS YearMonth

* Save
save "`datadir'CANSIM Data\PriceIndex.dta", replace
****
* ----------------------------------------------------------------------------


* ----------------------------------------------------------------------------
* Step 4: Merge price index with sales, collapse to yearlevel
* ----------------------------------------------------------------------------
* Open sales as base
use"`datadir'CANSIM Data\NominalSales_Monthly_92to2015.dta", clear
sort NAICS YearMonth
merge 1:1 NAICS YearMonth using "`datadir'CANSIM Data\PriceIndex.dta", gen(_mergePriceInd)
drop _mergePriceInd

* Create real sales
gen RealSales = (NomSales*100)/PriceInd

* Restring NAICS
tostring NAICS, replace

* Create new sector definitions
gen SectorNew = "Chemicals" if NAICS=="325"
replace SectorNew = "Abrasives"	if NAICS=="32791"
replace SectorNew = "Food"	if NAICS=="311" | NAICS=="312"
replace SectorNew = "Electronics"	if NAICS=="334" | NAICS=="335"
replace SectorNew = "Glass"	if NAICS=="3272"
replace SectorNew = "Plastics"	if NAICS=="3261"
replace SectorNew = "Paper"	if NAICS=="322"
replace SectorNew = "Textiles"	if NAICS=="313" | NAICS=="314" | NAICS=="315"
replace SectorNew = "Transportation Equipment"	if NAICS=="336"
replace SectorNew = "Wood"	if NAICS=="321"
replace SectorNew = "Rubber"	if NAICS=="3262"
replace SectorNew = "Asphalt"	if NAICS=="32412"
replace SectorNew = "Cement and Concrete"	if NAICS=="3273"
replace SectorNew = "Lime and Gypsum"	if NAICS=="3274"
replace SectorNew = "Other Minerals"	if NAICS=="3271" | NAICS=="32799"
replace SectorNew = "Petroleum Refining"	if NAICS=="32411" | NAICS=="32419"
replace SectorNew = "Printing"	if NAICS=="323"
replace SectorNew = "Metals"	if NAICS=="331" | NAICS=="332"
replace SectorNew = "Other"	if NAICS=="316" | NAICS=="333" | NAICS=="337" | NAICS=="339"
replace SectorNew = "Total"	if NAICS=="999"

* Drop old sector
drop NAICS

* Drop deflator
drop PriceInd

* Create year variable
gen Year = int(YearMonth*0.01)

* Collapse to year on new sector
collapse (sum) NomSales RealSales, by(SectorNew Year)

* Save
save "`datadir'CANSIM Data\NominalandRealSales_92to2015.dta", replace
* ----------------------------------------------------------------------------


* ----------------------------------------------------------------------------
* Step 5: Merge sales with pollution. This creates the final dataset used in the
* decomposition exercise.
* ----------------------------------------------------------------------------
* Use pollution panel as base
use "`datadir'APEI Data\PollutionPanel_1990to2015.dta", clear
sort SectorNew Year
merge m:1 SectorNew Year using "`datadir'CANSIM Data\NominalandRealSales_92to2015.dta", gen(_mergeSales)
drop if _mergeSales==1
drop _mergeSales

* Save
save "`datadir'PollutionAndSalesPanel_92to2015.dta", replace
* ----------------------------------------------------------------------------
*/

* ----------------------------------------------------------------------------
* Step 6: Decomposition of sector pollution intensity
* ----------------------------------------------------------------------------
* Open data
use "`datadir'PollutionAndSalesPanel_92to2015.dta", clear

* Duplicate year indicator
bysort Year: gen dupYear = cond(_N==1,0,_n)

* Total sales each year
bysort Year: egen RealSales_TOTALYear = total(RealSales), missing
gen Share = RealSales/RealSales_TOTALYear

* Base year share at industry level
gen Share_92store = Share if Year==1992
bysort SectorNew (Share_92store): gen Share_92 = Share_92store[1]

foreach k in TPMT PM10T PM25T SOXT NOXT VOCT COT {

	****
	* Pollution levels - Actual Change
	****
	* Total pollution each year
	bysort Year: egen `k'_TOTALYear = total(`k'), missing

	* 92 to 15 - Emissions
	gen `k'_TOTALYear92store = `k'_TOTALYear if Year==1992
	sort `k'_TOTALYear92store
	gen `k'_TOTALYear92 = `k'_TOTALYear92store[1]
	gen `k'_Emissions_Full = ((`k'_TOTALYear - `k'_TOTALYear92)/`k'_TOTALYear92) if Year==2015
	****

	****
	* Aggregate pollution intensity - Actual Change
	****
	gen `k'_TOTALPolIntense = `k'_TOTALYear/RealSales_TOTALYear
	
	* 92 to 15 - Emissions intensity 
	gen `k'_TOTALPolIntense92store = `k'_TOTALPolIntense if Year==1992
	sort `k'_TOTALPolIntense92store
	gen `k'_TOTALPolIntense92 = `k'_TOTALPolIntense92store[1]
	gen `k'_PolIntense_Full = ((`k'_TOTALPolIntense - `k'_TOTALPolIntense92)/`k'_TOTALPolIntense92) if Year==2015
	
	* 2004 to 10 - Emissions intensity 
	gen `k'_TOTALPolIntense04store = `k'_TOTALPolIntense if Year==2004
	sort `k'_TOTALPolIntense04store
	gen `k'_TOTALPolIntense04 = `k'_TOTALPolIntense04store[1]
	gen `k'_PolIntense_04to10 = ((`k'_TOTALPolIntense - `k'_TOTALPolIntense04)/`k'_TOTALPolIntense04) if Year==2010
	****
	
	****
	* Decomposition - Use Laspeyre's index
	****
	* Industry pollution intensity
	gen `k'PolIntense = `k'/RealSales
	gen `k'PolIntense_92store = `k'PolIntense if Year==1992
	bysort SectorNew (`k'PolIntense_92store): gen `k'PolIntense_92 = `k'PolIntense_92store[1]
	
	* Composition Effect
	* Component 1: Laspeyres index in each year
	gen Comp_Numerator`k' = `k'PolIntense_92*Share
	bysort Year: egen Comp_Numerator`k'_SumYear = total(Comp_Numerator`k'), missing
	gen Comp_Denominator`k' = `k'PolIntense_92*Share_92
	bysort Year: egen Comp_Denom`k'_SumYear = total(Comp_Denominator`k'), missing
	gen `k'_CompLasperes = Comp_Numerator`k'_SumYear/Comp_Denom`k'_SumYear
	* Component 2: Change in index relative to 1992
	gen `k'_CompLaspScaled92store = `k'_CompLasperes if Year==1992
	sort `k'_CompLaspScaled92store
	gen `k'_CompLasperesScaled92 = `k'_CompLaspScaled92store[1]
	gen `k'_Comp_Full = ((`k'_CompLasperes - `k'_CompLasperesScaled92)/`k'_CompLasperesScaled92) if Year==2015
	
	* Industry Pollution Intensity
	* Component 1: Laspeyres index in each year
	gen Tech_Numerator`k' = `k'PolIntense*Share_92
	bysort Year: egen Tech_Numerator`k'_SumYear = total(Tech_Numerator`k'), missing
	gen Tech_Denominator`k' = `k'PolIntense_92*Share_92
	bysort Year: egen Tech_Denom`k'_SumYear = total(Tech_Denominator`k'), missing
	gen `k'_TechLasperes = Tech_Numerator`k'_SumYear/Tech_Denom`k'_SumYear
	* Component 2: Change in index relative to 1992
	gen `k'_TechLaspScaled92store = `k'_TechLasperes if Year==1992
	sort `k'_TechLaspScaled92store
	gen `k'_TechLasperesScaled92 = `k'_TechLaspScaled92store[1]
	gen `k'_Tech_Full = ((`k'_TechLasperes - `k'_TechLasperesScaled92)/`k'_TechLasperesScaled92) if Year==2015
	****
	
	* Change in total pollution from base
	gen `k'_TOTALrel = `k'_TOTALYear/`k'_TOTALYear92
	
	* Change in pollution intensity from base year
	gen `k'_TOTALPolIntenseRel = `k'_TOTALPolIntense/`k'_TOTALPolIntense92
}
* ----------------------------------------------------------------------------

* ----------------------------------------------------------------------------
* Step 7: Print results for Table 1
* ----------------------------------------------------------------------------
**************
* Aggregate pollution intensity change from 1992 to 2015 - Column 1, Table 1
**************
* PM 2.5
tab PM25T_PolIntense_Full
* NOx
tab NOXT_PolIntense_Full
* VOC
tab VOCT_PolIntense_Full
* CO
tab COT_PolIntense_Full
**************

**************
* Industry Technique Effect from 1992 to 2015 - Column 2, Table 1
**************
* PM 2.5
tab PM25T_Tech_Full
* NOx
tab NOXT_Tech_Full
* VOC
tab VOCT_Tech_Full
* CO
tab COT_Tech_Full
**************
* ----------------------------------------------------------------------------

* ----------------------------------------------------------------------------
* Step 8: Produce figure 1 showing pollution intensity over time.  
* ----------------------------------------------------------------------------
**************
* Pollution Intensity Graph - Figure 1
**************
graph twoway connected PM25T_TOTALPolIntenseRel NOXT_TOTALPolIntenseRel VOCT_TOTALPolIntenseRel COT_TOTALPolIntenseRel Year if dupYear<=1 & Year>=1992 & Year<=2015, sort xlabel(1992 1999 2007 2015) ///
	legend(r(1) holes(3 4 5) colgap(*.35) order(1 "PM{subscript:2.5}" 2 "NO{subscript:X}" 3 "VOC" 4 "CO") textfirst)  graphregion(fcolor(white) lcolor(none)) ///
	msymbol(d t s o) mcolor(maroon dkgreen orange navy) lcolor(maroon dkgreen orange navy) lpattern(longdash dash shortdash dot) lwidth(medthick medthick medthick medthick) 
**************
* ----------------------------------------------------------------------------
